% Created 2019-07-14 日 17:41
% Intended LaTeX compiler: pdflatex
\documentclass[11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{minted}
\input{preamble.tex}
\graphicspath{{../../images/NumericalAnalysis/}}
\author{gouziwu}
\date{\today}
\title{Numerical Analysis}
\hypersetup{
 pdfauthor={gouziwu},
 pdftitle={Numerical Analysis},
 pdfkeywords={},
 pdfsubject={},
 pdfcreator={Emacs 26.2 (Org mode 9.2.4)}, 
 pdflang={English}}
\begin{document}

\maketitle
\tableofcontents


\section{Chap1 Mathematical Preliminaries}
\label{sec:orgc7383e6}
\subsection{1.2 Roundoff Errors and Computer Arithmetic}
\label{sec:orgdbb3acc}
\textbf{Truncation Error} : the error involved in using a truncated, or finite, summation to
approximate the sum of an infinite series 

\textbf{Roundoff Error}: the error produced when performing real number calculations.
It occurs because the arithmetic performed in a machine involves numbers
with only a finite number of digits. 


Suppose \(y=\textcolor{blue}{0.d_1d_2\dots
   d_k}d_{k+1}d_{k+2}\dots\textcolor{blue}{\times 10^n{}}\), then

\(fl(y)=\begin{cases} 0.d_1d_2\dots d_k\times 10^n&\quad\text{chopping}\\
   chop(y+5\times 10^{n-(k+1)})=0.\delta_1\delta_2\dots \delta_k\times
   10^n&\quad\text{Rounding}\\\end{cases}\)


\begin{definition}
If $p*$ is an approximation to $p$, the \textcolor{red}{absolute error} is $|p-p*|$,
and the \textcolor{red}{relative error} is $\frac{|p-p*|}{|p|}$, provided that $p\neq 0$
\end{definition}

\begin{definition}
The number $p*$ is said to approximate $p$ to $t$
\textcolor{red}{significant digits} if $t$ is the largest nonnegative
integer for which $\frac{|p-p*|}{|p|}<5\times 10^{-t}$
\end{definition}

\begin{description}
\item[{chopping}] \(|\frac{y-fl(y)}{y}|=|\frac{0.d_1d_2\dots d_kd_{k+1}\dots
                 \times 10^n-0.d_1d_2\dots d_k\times 10^n}{0.d_1d_2\dots
                 d_kd_{k+1}\times
                 10^n}|=|\frac{0.d_{k+1}\dots}{0.d_1d_2\dots}|\times 10^{-k}\le
                 \frac{1}{0.1}\times 10^{-k}=10^{-k+1}\)
\item[{rounding}] \(|\frac{y-fl(y)}{y}|\le \frac{0.5}{0.1}\times 10^{-k}=0.5\times
                 10^{-k+1}\)
\end{description}


\textcolor{red}\{ Subtraction of nearly equal numbers will
cause a cancellation of significant digits.\} 


\textcolor{red}\{Dividing by a number with small magnitude (or, 
equivalently, multiplying by a number with large magnitude) will 
cause an enlargement of the error.\} 

\textbf{Finite digit arithmetic}

\begin{itemize}
\item \(x\oplus y=fl(fl(x)+fl(y))\)
\item \(x\otimes y=fl(fl(x)\times fl(y))\)
\item \(x\ominus y=fl(fl(x)-fl(y))\)
\item \(x\odiv y=fl(fl(x)\div fl(y))\)
\end{itemize}

\subsection{1.3 ALgorithms and Convergence}
\label{sec:orgdb8a87b}
An algorithm that satisfies that small changes in the initial data produce
correspondingly small changes in the final results is called \textbf{stable};
otherwise it is \textbf{unstable}. An algorithm is called \textbf{conditionally stable} if it
is stable only for certain choices of initial data. 

Suppose that E₀ > 0 denotes an initial error and En represents the magnitude
of an error after n subsequent operations. If \(E_n\approx CnE_0\), where C is a
constant independent of n, then the growth of error is said to be \textbf{linear}. If
\(E_n\approx C^nE_0\), for some C > 1, then the growth of error is called \textbf{exponential} 

Suppose \(\{\beta_n\}_{n=1}^\infty, \lim\limits_{n \to \infty}\beta_n=0,
   \{\alpha_n\}_{n=1}^\infty, \lim\limits_{n\to\infty}\alpha_n=\alpha\).
If a positive constant K exists with \(|\alpha_n-\alpha|\le K|\beta_n|\) for
large n, then \(\{\alpha_n\}_{n=1}^\infty\) converges to α with \textbf{rate, or}
\textbf{order, of convergence} \(O(\beta_n)\)

Suppose \(\lim\limits_{h\to 0}G(h)=0, \lim\limits_{h\to 0}F(h)=L\) and
\(|F(h)-L|\le K|G(h)|\) for sufficiently small h, then we write
\(F(h)=L+O(G(h))\)
\section{Chap2 Solutions of equations in one variable}
\label{sec:org6b296c8}
\subsection{2.1 Bisection method}
\label{sec:orga87bb52}
\begin{theorem}{Intermediate Value Theorem}
If $f\in C[a,b]$, $K\in(f(a), f(b))$, then there exists a number $p\in(a,b)$
for which $f(p)=K$
\end{theorem}

\begin{theorem}
Suppose that $f\in C[a,b]$ and $f(a)\cdot f(b)<0$. The bisection method
generates a sequence $\{p_n\},n=0,1,\dots$ approximating a zero $p$ of $f$ with
\begin{equation*}
|p_n-p|\le\frac{b-a}{2^n}, \quad\text{when } n\ge 1
\end{equation*}
\end{theorem}
\subsection{2.2 Fixed-Point Iteration}
\label{sec:orgf51239e}
\(f(x)=0\xleftrightarrow{\text{equivalent}} x=f(x)+x=g(x)\)

\begin{theorem}{Fixed-Point Theorem}
Let $g\in C[a,b]$ be s.t. $g(x)\in[a,b]$ for all $x\in[a,b]$. Suppose that
$g'$ exists on $(a,b)$ and that a constant $0<k<1$ exists with $|g'(x)|\le k$
for all $x\in(a,b)$ (hence $g'$ can't converge to 1). Then for any number
$p_0$ in $[a,b]$, the sequence defined by $p_n=g(p_{n-1}), n\ge 1$ converges
to the unique point $p$ in $[a,b]$
\end{theorem}

\begin{corollary}
$|p_n-p|\le\frac{1}{1-k}|p_{n+1}-p_n|$ and
$|p_n-p|\le\frac{k^n}{1-k}|p_1-p_0|$
\end{corollary}
\subsection{2.3 Newton's method}
\label{sec:orge17fa88}
Linearize a nonlinear function using \textbf{Taylor's expansion}

Let \(p_0\in [a,b]\) be an approximation to \(p\) s.t. \(f'(p_0)\neq 0\), hence 
\(f(x)=f(p_0)+f'(p_0)(x-p_0)+\frac{f''(\xi_x)}{2!}(x-p_0)^2\), then
\(0=f(p)\approx f(p_0)+f'(p_0)(p-p0)\rightarrow p\approx
   p_0-\frac{f(p_0)}{f'(p_0)}\)
\(p_n=p_{n-1}-\frac{f(p_{n-1})}{f'(p_{n-1})},\quad\text{for} n\ge 1\)

\begin{theorem}
Let $f\in C^2[a,b]$. If $p\in[a,b]$ is s.t. $f(p)=0,f'(p)\neq0$, then there
exists a $\delta>0$ s.t. Newton's method generates a sequence $\{p_n\},
n\in\mathbb{N}\setminus\{0\}$ converging to $p$ for any initial approximation
$p\in[p-\delta,p+\delta]$.
\end{theorem}
\subsection{2.4 Error analysis for iterative methods}
\label{sec:orgcb80e2d}
\begin{definition}
Suppose $\{p_n\}(n=0,1,\dots)$ is a sequence that converges to $p$ with
$p_n\neq p$ for all $n$. If positive constants $\alpha$ and $\lambda$ exist
with
\begin{equation*}
\lim\limits_{n\to\infty}\frac{|p_{n+1}-p|}{|p_n-p|^\alpha}=\lambda
\end{equation*}
then $\{p_n\}(n=0,1,\dots)$ \textcolor{red}{converges to p of order
$\alpha$, with asymptotic error constant $\lambda$}
\end{definition}

\begin{theorem}
Let $p$ be a fixed point of $g(x)$. If there exists some constant $\alpha\ge
2$ s.t. $g\in C^\alpha[p-\delta,p+\delta]$,
\textcolor{red}{$g'(p)=\dots=g^{\alpha-1}(p)=0$} and \textcolor{red}{$g^\alpha(p)\neq 0$}.
Then the iterations with $p_n=g(p_{n-1})$, $n\ge1$ is of \textcolor{red}{order $\alpha$}
\end{theorem}

\begin{equation*}
p_{n+1}=g(p_n)=g(p)+g'(p)(p_n-p)+\dots+\frac{g^\alpha(\xi_n)}{\alpha!}(p_n-p)^\alpha
\end{equation*}

\begin{theorem}
Let $g\in C[a,b]$ be s.t. $g(x)\in[a,b]$ for all $x\in[a,b]$. Suppose in
addition that $g'$ is continuous on $(a,b)$ and a positive constant $k<1$
exists with
\begin{equation*}
|g'(x)|\le k, \quad \text{for all } x\in(a,b)
\end{equation*}
If $g'(p)\neq0$, then for any number $p_0\neq p$ in $[a,b]$, the sequence
\begin{equation*}
p_n=g(p_{n-1}),\quad\text{for }n\ge 1
\end{equation*}
converges only linearly to the unique fixed point in $[a,b]$
\end{theorem}

\begin{proof}
\begin{align*}
\lim\limits_{n\to\infty}\frac{|p_{n+1}-p|}{|p_n-p|}&=
\lim\limits_{n\to\infty}\frac{|g(p_n)-p|}{|p_n-p|}\\
&=\lim\limits_{n\to\infty}\frac{|g'(\xi)(p_n-p)|}{|p_n-p|}\\
&=|g'(p)|
\end{align*}
\end{proof}

\begin{theorem}
Let $p$ be a solution of the equation $x=g(x)$. Suppose that $g'(p)=0$ and
g'' is continuous with $|g''(x)|<M$ on an open interval $I$ containing $p$.
Then there exists a $\delta>0$ s.t. for $p_0\in[p-\delta,p+\delta]$, the
sequence defined by $p_n=g(p_{n-1})$, when $n\ge 1$ converges at least
quadratically to $p$. Moreover, for sufficiently large values of $n$,
\begin{equation*}
|p_{n+1}-p|<\frac{M}{2}|p_n-p|^2
\end{equation*}
\end{theorem}

\begin{proof}
Choose $k\in(0,1),\delta>0$ s.t. $[p-\delta,p+\delta]\subseteq I$ and
$|g'(x)|<k$ and $g''$ is continuous.
\begin{equation*}
g(x)=g(p)+g'(p)(x-p)+\frac{g''(\xi)}{2}(x-p)^2
\end{equation*}
Hence $g(x)=p+\frac{g''(\xi)}{2}(x-p)^2$.
$p_{n+1}=g(p_n)=p+\frac{g''(\xi_n)}{2}(p_n-p)^2$. Thus
$p_{n+1}-p=\frac{g''(\xi_n)}{2}(p_n-p)^2$. We get
\begin{equation*}
\lim\limits_{n\to\infty}\frac{|p_{n+1}-p|}{|p_n-p|^2}=\frac{g''(p)}{2}
\end{equation*}
\end{proof}

\begin{definition}
A solution $p$ of $f(x) = 0$ is a \textcolor{red}{zero of multiplicity} $m$
of $f$ if for $x\neq p$, $f(x)=(x-p)^mq(x)$ where $\lim\limits_{x\to
p}q(x)\neq 0$
\end{definition}

\begin{theorem}
The function $f\in C^m[a,b]$ has a zero of multiplicity $m$ at $p$ in $(a,b)$
if and only if
\begin{equation*}
0=f(p)=f'(p)=\dots=f^{(m-1)}(p),\quad\text{but } f^{(m)}(p)\neq 0
\end{equation*}
\end{theorem}

To handle the problem of multiple roots of a function \(f\) is to define
\(\mu(x)=\frac{f(x)}{f'(x)}\).

If p is a zero of f of multiplicity m with \(f(x)=(x-p)^mq(x
   )\), then
\begin{align*}
\mu(x)&=\frac{(x-p)^mq(x)}{m(x-p)^{m-1}q(x)+(x-p)^mq'(x)}\\
&=(x-p)\frac{q(x)}{mq(x)+(x-p)q'(x)}
\end{align*}
And \(q(x)\neq 0\).

Now Newton's method:
\begin{align*}
g(x)&=x-\frac{\mu(x)}{\mu'(x)}\\
&=x-\frac{f(x)/f'(x)}{(f'(x)^2-f(x)f''(x))/f'(x)^2}\\
&=x-\frac{f(x)f'(x)}{f'(x)^2-f(x)f''(x)}
\end{align*}
\section{Chap3 Interpolation and polynomial approximation}
\label{sec:org01bca41}
\subsection{3.1 Interpolation and the Lagrange polynomial}
\label{sec:orgefa9bb8}
\(P_n(x)=\displaystyle\sum_{i=0}^nL_{n,i}(x)y_i\). Find \(L_{n,i}(x)\) for
\(i=0,\dots,n\) s.t. \(L_{n,j}(x_j)=\delta_{ij}\). \(\delta_{ij}\) Kronecker delta.
Each \(L_{n,i}\) has n roots \(x_0,\dots,\hat{x_i},\dots,x_n\).
\(L_{n,j}(x)=C_i(x-x_0)\dots\hat{(x-x_i)}\dots(x-x_n)=C_i \displaystyle
   \prod_{\substack{j\neq i\\j=0}}^n(x-x_j)\).
\(L_{n,j}(x_i)=1\to C_i=\displaystyle\prod_{j\neq i}\frac{1}{x_i-x_j}\).
Hence \(L_{n,i}(x)=\displaystyle\prod_{\substack{j\neq i\\j=0}}^n
   \frac{x-x_j}{x_i-x_j}\)

\begin{theorem}
If $x_0,x_1,\dots,x_n$ are n+1 distinct numbers and $f$ is a function whose values
are given at these numbers, then the n-th Lagrange interpolating polynomial 
is unique
\end{theorem}


\textbf{Analyze the remainder}. Suppose \(a\le x_0<x_1<\dots<x_n\le b\) and \(f\in
   C^{n+1}[a,b]\). Consider \(R_n(x)=f(x)-P_n(x)\).
\(R_n(x)\) has at least n+1 roots =>
\(R_n(x)=K(x)\displaystyle\prod_{i=0}^n(x-x_i)\).
For any \(x\neq x_i\). Define
\(g(t)=R_n(t)-K(x)\displaystyle\prod_{i=0}^n(t-x_i)\). \(g(x)\) has n+2 distinct
roots \(x_0\dots x_n x\). Hence \(g^{(n+1)}(\xi_x)=0,\xi_x\in(a,b)\).
\(f^{(n+1)}(\xi_x)-Pn^{(n+1)}(\xi_x)-K(x)(n+1)!=R_n^{(n+1)}(\xi_x)-K(x)(n+1)!\).
Thus
\(R_n(x)=\frac{f^{(n+1)}(\xi_x)}{(n+1)!}\displaystyle\prod_{i=0}^n(x-x_i)\).

\begin{definition}
Let $f$ be a function defined at $x_0,\dots,x_n$ and suppose $m_1,\dots,m_k$ are
k distinct integers with $0\le m_i\le n$ for each i. The Lagrange polynomial that
agrees with $f(x)$ at the k points $x_{m_1},\dots,x_{m_k}$ denoted by 
$P_{m_1,\dot,m_k}(x)$
\end{definition}

\begin{theorem}
Let $f$ be defined at $x_0,\dots,x_k$ and let $x_i$ and $x_j$ be two distinct numbers in
this set. Then
\begin{equation*}
P(x)=\frac{(x-x_j)P_{0,1,\dots,j-1,j+1,\dots,k(x)}-(x-x_i)P_{0,\dots,i-1,i+1,\dots,k(x)}}
{x_i-x_j}
\end{equation*}
describes the k-th Lagrange polynomial that interpolates $f$ at the k+1 points
$x_0,\dots,x_k$
\end{theorem}

\textbf{Neville's Method}
\begin{tabular}{c c c c c c}
$x_0$ & $P_0$ &           &             &            \\
$x_1$ & $P_1$ & $P_{0,1}$ &             &            \\
$x_2$ & $P_2$ & $P_{1,2}$ & $P_{0,1,2}$ &            \\
$x_3$ & $P_3$ & $P_{2,3}$ & $P_{1,2,3}$ & $P_{0,1,2,3}$\\
\end{tabular}
\subsection{3.2 Divied differences}
\label{sec:orgcf80581}
\(f[x_i,x_j]=\frac{f(x_i)-f(x_j)}{x_i-x_j}(i\neq j, x_i\neq x_j)\).
\(f[x_i,x_j,x_k]=\frac{f[x_i,x_j]-f[x_j,x_k]}{x_i-x_k}\).
\subsection{Additional Newton Interpolation}
\label{sec:orgf3e1340}
\subsubsection{Simple idea}
\label{sec:orgcefd3e5}
Given \(x_0,\dots,x_n\)
\begin{enumerate}
\item Fitting $x_0$ first: $f(x)\approx f_0, f_0=f(x_0)$
\item Add one more point $x_1$, $f_1=f(x_1)$
\begin{equation*}
f(x) \approx f_0+\alpha_1(x-x_0),\alpha_1=\frac{f_1-f_0}{x_1-x_0}
\end{equation*}
\item More points $f(x)\approx f_0+\alpha_1(x-x_0)+\alpha_2(x-x_0)(x-x_1)$
\end{enumerate}

\textbf{The pattern and coefficients}.
\(f(x)=\displaystyle\sum_{i=0}^n\alpha_i
    \displaystyle\prod_{j=0}^{j<i}(x-x_j)
    =\displaystyle\sum_{i=0}^n\alpha_iN^{(i)}(x)\)

\begin{equation*}
\begin{pmatrix}
f_0\\
f_1\\
\vdots\\
f_n
\end{pmatrix}=
\begin{pmatrix}
N^{(0)}(x_0) & N^{(1)}(x_0) & \dots & N^{(n)}(x_0)\\
N^{(0)}(x_1) & N^{(1)}(x_1) & \dots & N^{(n)}(x_1)\\
\vdots & \vdots & \ddots&\vdots\\
N^{(0)}(x_n) & N^{(1)}(x_n) & \dots & N^{(n)}(x_n)\\
\end{pmatrix}
\begin{pmatrix}
\alpha_0\\
\alpha_1\\
\vdots\\
\alpha_n
\end{pmatrix}
\end{equation*}

\(N^{(i)}(x_k)=\begin{cases}
    0&k<i\\
    \prod_{j=0}^{j<i}(x_k-x_j)&k\ge i\\
    \end{cases}\) with \(N^{(0)}(x) = 1\).
Newton interpolation matrix is lower triangular.
Lagrange matrix is identity.
\subsubsection{Basis transformation}
\label{sec:org63478f1}
\begin{equation*}
\begin{pmatrix}
1\\
(x-x_0)\\
(x-x_0)(x-x_1)\\
\vdots
\end{pmatrix}=(?)
\begin{pmatrix}
1\\
x\\
x^2\\
\vdots
\end{pmatrix}
\end{equation*}
Hence \((\Phi_B)^T=(T_A^B)^T(\Phi_A)^T\).
\(\Phi_B=\Phi_AT_A^B\)

\begin{align*}
(\Phi_A)(\alpha_A)=(f)&=(\Phi_B)(\alpha_B)\\
&=(\Phi_A)(T_A^B)(\alpha_B)\\
&\Rightarrow\\
(\alpha_A)&=(T_A^B)(\alpha_B)\\
(\alpha_B)&=(T_A^B)^{-1}(\alpha_A)\\
&=(T_B^A)(\alpha_A)
\end{align*}
\subsection{3.3 Hermite interpolation}
\label{sec:org37eeda3}
Find the \textbf{osculating polynomial} \(P(x)\) s.t. \(P(x_i)=f(x_i),
   P'(x_i)=f'(x_i),\dots,P^{(m_i)}(x_i)=f^{(m_i)}(x_i)\) for all \(i=0,1,\dots,n\).

Just the Taylor polynomial \(P(x)=f(x_0)+f'(x_0)(x-x_0)+\dots+
   \frac{f^{(m_0)}(x_0)}{m_0!}(x-x_0)^{m_0}\) with remainder 
\(R(x)=f(x)-\varphi(x)=\frac{f^{(m_0+1)}(\xi)}{(m_0+1)!}(x-x_0)^{(m_0+1)}\)

\(m_i = 1\) gives \textbf{Hermite polynomial}

\begin{example}
Suppose $x_0\neq x_1\neq x_2$. Given $f(x_0),f(x_1), f(x_2),
f'(x_1)$ find the polynomial $P(x)$ s.t. $P(x_i)=f(x_i),P'(x_1)=f'(x_1)$ and
analyze the errors.
\end{example}

\begin{proof}
$P_3(x)=\displaystyle\sum_{i=0}^2f(x_i)h_i(x)+f'(x_1)\hat{h}_1(x)$ where
$h_i(x_j)=\delta_{ij},h_i'(x_i)=0,\hat{h}_i(x_i)=0,\hat{h}_i'(x_1)=1$.
\begin{itemize}
\item $h_0(x)$. Has roots $x_1,x_2$ and $x_1$ is a multiple root.
      $h_0(x)=C_0(x-x_1)^2(x-x_2)$ and $h_0(x_0)=1\Longrightarrow C_0$
\item $\hat{h}_1(x)$ has root $x_0,x_1,x_2\Longrightarrow 
      \hat{h}_1(x)=C_1(x-x_0)(x-x_1)(x-x_2)$
\end{itemize}
\end{proof}

In general, given \(x_0,\dots,x_n;y_0,\dots,y_n\) and \(y_0',\dots,y_n'\). The
Hermite polynomial \(H_{2n+1}(x)\) satisfies \(H_{2n+1}(x_i)=y_i\) and
\(H'_{2n+1}(x_i)=y_i'\) 

\emph{Solution}.
\(H_{2n+1}(x)=\displaystyle\sum_{i=0}^ny_ih_i(x)+\displaystyle\sum_{i=0}^ny_i'
   \hat{h}_i(x)\)
\subsection{3.4 Cubic spline interpolation}
\label{sec:orgc05a8b6}
\textbf{Piecewise linear interpolation}. Approximate \(f(x)\) by linear polynomials on
each subinterval \([x_i,x_{i+1}]\).

\(f\approx P_1(x)=\frac{x-x_{i+1}}{x_i-x_{i+1}}y_i+\frac{x-x_i}
   {x_{i+1}-x_i}y_{i+1} \quad\text{for} \;x\in[x_i,x_{i+1}]\) 

Let \(h=\max\abs{x_{i+1}-x_i}\). Then \(P_1^h(x)\xrightarrow{uniform} f(x)\) as
\(h\to 0\) 
However, this is no longer smooth.

\textbf{Hermite piecewise polynomials}. Given
\(x_0,\dots,x_n;y_0,\dots,y_n,y_0',\dots,y_n'\), construct the Hermite
polynomial of degree 3 with \(y\) and \(y'\) on the two endpoints of
\([x_i,x_{i+1}]\)

\textbf{Cubic Spline}.
\begin{definition}
Given a function $f$ define on $[a,b]$ and a set of nodes $a=x_0<x_1<\dots<x_n=b$,
\textcolor{red}{cubic spline interpolant} $S$ for $f$ is a function that satisfies
the following conditions
\begin{itemize}
\item $S(x)$ is a cubic polynomial, denoted by $S_i(x)$ on the subinterval
$[x_i,x_{i+1}]$ for each $i=0,\dots,n-1$
\item $S(x_i)=f(x_i)$ for each $i=0,\dots, n$
\item $S_{i+1}(x_{i+1})=S_i(x_{i+1})$
\item $S'_{i+1}(x_{i+1})=S'_i(x_{i+1})$
\item $S''_{i+1}(x_{i+1})=S''_i(x_{i+1})$
\end{itemize}
\end{definition}

\includegraphics[width=100mm]{CubicSpline}

\textbf{Method of Bending moment}. Let \(h_j=x_j-x_{j-1}\) and \(S(x)=S_j(x)\) for
\(x\in[x_{j-1}, x_j]\). Then \(S_j''\) is a polynomial of degree
\textcolor{red}{1}, which can be determined by the values of f on
\textcolor{red}{2} nodes .

Assume \(S_j''(x_{j-1})=M_{j-1},S_j''(x_j)=M_j\). Then for all
\(x\in[x_{j-1},x_j]\),
\(S_j''(x)=M_{j-1}\frac{x_j-x}{h_j}+M_j\frac{x-x_{j-1}}{h_j}\). Hence we get
\begin{align*}
&S_j'(x)=-M_{j-1}\frac{(x_j-x)^2}{2h_j}+M_j\frac{(x-x_{j-1})^2}{2h_j}+A_j\\
&S_j(x)=M_{j-1}\frac{(x_j-x)^3}{6h_j}+M_j\frac{(x-x_{j-1})^3}{6h_j}+A_jx+B_j
\end{align*}

Solve this by \(S_j(x_{j-1})=y_{j-1},S_j(x_j)=y_j\), we get
\begin{align*}
A_j&=\frac{y_j-y_{j-1}}{h_j}-\frac{M_j-M_{j-1}}{6}h_j\\
A_jx+B_j&=(y_{i-1}-\frac{M_{j-1}}{6}h_j^2)\frac{x_j-x}{h_j}+ 
(y_j-\frac{M_j}{6}h_j^2)\frac{x-x_{j-1}}{h_j}
\end{align*}

Now solve for \(M_j\): Since \(S'\) is continuous at \(x_j\)
 \begin{align*}
[x_{j-1},x_j]:S'_j(x)&=-M_{j-1}\frac{(x_j-x)^2}{2h_j}+M_j\frac{(x-x_{j-1})^2}{2h_j}
                       +f[x_{j-1},x_j]-\frac{M_j-M_{j-1}}{6}h_j\\
[x_j,x_{j+1}]:S'_{j+1}(x)&=-M_j\frac{(x_{j+1}-x)^2}{2h_{j+1}}+M_{j+1}
\frac{(x-x_j)^2}{2h_{j+1}}+f[x_j,x_{j+1}]-\frac{M_{j+1}-M_j}{6}h_{j+1}\\
\end{align*}
From \(S'_j(x_j)=S'_{j+1}(x_j)\), let \(\lambda_j=\frac{h_{j+1}}{h_j+h_{j+1}},
   \mu_j=1-\lambda_j,g_j=\frac{6}{h_j+h_{j+1}}(f[x_j,x_{j+1}]-f[x_{j-1},x_j])\)
we get
\begin{equation*}
\mu_jM_{j-1}+2M_j+\lambda_jM_{j+1}=g_j\quad\text{for } \;1\le j\le n-1
\end{equation*}
\begin{equation*}
\begin{pmatrix}
\mu_1 & 2 & \lambda_1 &&\\
& \ddots &\ddots &\ddots &\\
&&\mu_{n-1}&2&\lambda_{n-1}
\end{pmatrix}
\begin{pmatrix}
M_0\\
\vdots\\
\vdots\\
M_n\\
\end{pmatrix}=
\begin{pmatrix}
g_1\\
\vdots\\
g_{n-1}
\end{pmatrix}
\end{equation*}

And  \(S'(a)=y_0',S'(b)=y_n'\)

If \(S''(a)=y_0''=M_0,S''(b)=y_n''=M_n\), then \(\lambda_0=0,g_0=2y_0'',\mu_n=0
   g_n=2y_n''\).

The case when \(M_0=M_n=0\) is called a \textbf{free boundary}, the spline is called
\textbf{natural spline}
\section{chap4 numerical differentiation and integration}
\label{sec:org8a76af3}
\subsection{4.1 numerical differentiation}
\label{sec:org25c9fb7}
\textbf{Target}: Given \(x_0\), approximate \(f'(x_0)\)

\begin{equation*}
f'(x_0)=\lim\limits_{h\to0}\frac{f(x_0+h)-f(x_0)}{h}
\end{equation*}

Approximate \(f(x)\) by its lagrange polynomial with interpolating points \(x_0\)
and \(x_0+h\)

\begin{align*}
f(x)&=\frac{f(x_0)(x-x_0-h)}{x_0-x_0-h}+\frac{f(x_0+h)(x-x_0)}{x_0+h-x_0}\\
&+\frac{(x-x_0)(x-x_0-h)}{2}f''(\xi_x)\\
f'(x)&=\frac{f(x_0+h)-f(x_0)}{h}+\frac{2(x-x_0)-h}{2}f''(\xi_x)\\
&+\frac{(x-x_0)(x-x_0-h)}{2}\frac{d}{dx}[f''(\xi_x)]\\
f'(x_0)&=\frac{f(x_0+h)-f(x_0)}{h}-\frac{h}{2}f''(\xi)
\end{align*}

Approximate \(f(x)\) by its Lagrange polynomial with interpolating points
\(\{x_0,x_1,\dots,x_n\}\)

\begin{align*}
f(x)&=\displaystyle\sum_{k=0}^nf(x_k)L_k(x)+\frac{(x-x_0)\dots(x-x_n)}{(n+1)!}
f^{(n+1)}(\xi_x)\\
f'(x_j)&=\displaystyle\sum_{k=0}^nf(x_k)L_k'(x_j)+\frac{f^{(n+1)}(\xi_j)}{(n+1)!}
\displaystyle\prod_{\substack{k=0\\k\neq j}}^n(x_j-x_k)
\end{align*}
\subsection{4.3 elements of numerical integration}
\label{sec:orgcf85682}
\textbf{Target}: approximate \(I=\int_a^bf(x)dx\)

Integrate the \textbf{Lagrange interpolating polynomial} of \(f(x)\) instead

Select a set of distinct nodes \(a\le x_0<x_1<\dots<x_n\le b\) from \([a,b]\).
The Lagrange polynomial is \(P_n(x)=\displaystyle\sum_{k=0}^nf(x_k)L_k(x)\)

\begin{equation*}
\int_a^bf(x)dx\approx \displaystyle\sum_{k=0}^nf(x_k)
\overbrace{\int_a^b L_k(x)dx}^{A_k}
\end{equation*}

Error
\begin{align*}
R[f]&=\int_a^bf(x)dx-\displaystyle\sum_{k=0}^nA_kf(x_k)\\
&=\int_a^b[f(x)-P_n(x)]dx=\int_a^bR_n(x)dx\\
&=\int_a^b\frac{f^{(n+1)}(\xi_x)}{(n+1)!}\displaystyle\prod_{i=0}^n(x-x_i)dx
\end{align*}

\begin{definition}
The \textcolor{red}{degree of accuracy}, or \textcolor{red}{precision} of a quadrature
formula is the largest positive integer \textcolor{red}{$n$}   s.t. 
the formula is \textcolor{red}{exact}
for $x^k$ for each $k=0,1,\dots,n$
\end{definition}

Example. Consider the linear interpolation on \([a,b]\), we have 
\begin{equation*}
P_1(x)=\frac{x-b}{a-b}f(a)+\frac{x-a}{b-a}f(b)
\end{equation*}
\(A_1=A_2=\frac{b-a}{2}, \int_a^bf(x)dx\approx\frac{b-a}{2}[f(a)+f(b)]\). This
is \textcolor{red}{trapezoidal rule}.

Consider \(x^k\)
\begin{align*}
1:\quad &\int_a^b1dx=b-a \textcolor{red}{=} \frac{b-a}{2}[1+1]\\
x:\quad &\int_a^bxdx=b-a \textcolor{red}{=}\frac{b-a}{2}[a+b]\\
x^2:\quad &\int_a^bx^2dx=b-a \textcolor{red}{\neq}  \frac{b-a}{2}[a^2+b^2]\\
\end{align*}

For equally spaced nodes: \(x_i=a+ih,h=\frac{b-a}{n}, i=0,1,\dots,n\)

\begin{align*}
A_i&=\int_{x_0}^{x_n}\displaystyle\prod_{j\neq i}\frac{x-x_j}{x_i-x_j}dx\\
&=\int_0^n\displaystyle\prod_{i\neq j}\frac{(t-j)h}{(i-j)h}\times hdt\quad x=a+th\\
&=\frac{(b-a)(-1)^{n-i}}{n\;i!(n-i)!}\int_0^n\displaystyle\prod_{i\neq j}(t-j)dt
\end{align*}

\(\frac{(-1)^{n-i}}{n\;i!(n-i)!}\int_0^n\displaystyle\prod_{i\neq j}(t-j)dt\)
is the \textbf{Cotes coefficients}

\subsection{4.4 composite numerical integration}
\label{sec:orgc20a08d}
Due to the oscillatory nature of high-degree polynomials, \textbf{piecewise}
interpolation is applied to approximate \(f(x)\). A piecewise approach that
uses the low-order Newton-Cotes formulae


Composite Trapezoidal rule: \(h=\frac{b-a}{n}, x_k=a+kh\).

Apply Trapezoidal Rule on each \([x_{k-1}, x_k]\)
\begin{tikzpicture}
\filldraw [gray] (0,0) circle [radius=2pt]
(-1.5,0) circle [radius=2pt]
(0,0.5) circle [radius=2pt]
(-0.75,0) circle [radius=2pt]
(-0.75,0.5) circle [radius=2pt]
(0.75,0.5) circle [radius=2pt]
(1.5,0) circle [radius=2pt]
(0.75,0) circle [radius=2pt];
\draw (-1.5,0) -- (1.5,0);
\end{tikzpicture}

\begin{equation*}
\int_{x_{k-1}}^{x_k}f(x)d(x)\approx \frac{x_k-x_{k-1}}{2}[f(x_{k-1})-f(x_k)]
\end{equation*}
\begin{equation*}
\int_a^bf(x)dx\approx \displaystyle\sum_{k=1}^n\frac{h}{2}[f(x_{k-1})+f(x_k)]=
\frac{h}{2}\left[f(a)+2 \displaystyle\sum_{k=1}^{n-1}f(x_k)+f(b)\right]=
\textcolor{red}{T_n} 
\end{equation*}
\begin{equation*}
R[f]=\displaystyle\sum_{k=1}^n\left[-\frac{h^3}{12}f''(\xi_k)\right]=-\frac{h^2}
{12}(b-a)\frac{\displaystyle\sum_{k=1}^nf''(\xi_k)}{n}=
-\frac{h^2}{12}(b-a)f''(\xi),\xi\in(a,b)
\end{equation*}


\textbf{Composite simpson's rule}
\begin{equation*}
\int_{x_k}^{x_{k+1}}f(x)dx\approx\frac{h}{6}\left[f(x_k)+4f(x_{k+1/2})+f(x_{k+1})\right]
\end{equation*}
In fact, it's just a mean value \((f(x_k)+4f(x_{k+1/2})+f(x_{k+1})) / 6\)

\subsection{4.5 Romberg integration}
\label{sec:org78a9b00}
\begin{align*}
R_n[f]&=-\frac{h^2}{12}(b-a)f''(\xi)\\
R_{2n}[f]&=-\frac{h^2/4}{12}(b-a)f''(\xi')\approx \frac{1}{4}R_n[f]\\
\end{align*}
Hence we have
\begin{equation*}
\frac{I-T_{2n}}{I-T_n}\approx\frac{1}{4}
\end{equation*}
and \(I\approx \frac{4}{3}T_{2n}-\frac{1}{3}T_n=\textcolor{red}{S_n}\)
.\(\frac{4^2S_{2n}-S_n}{4^2-1}=C_n\), \(\frac{4^3C_{2n}-S_n}{4^3-1}=R_n\), the
\textbf{Romberg sequence}

\subsection{4.2 Richardson's Extrapolation}
\label{sec:orgc722b7c}
generate high-accuracy results while using low-order formulae

For some \(h\neq 0\), suppose we have \(T_0(h)\) that approximates an unknown
\(I\), and
\begin{align*}
T_0(h)-I&=\alpha_1 h+\alpha_2h+\dots\\
T_0(h/2)-I&=\alpha_1(h/2)+\alpha_2(h/2)^2+\dots\\
\end{align*}
Hence can improve accuracy by substituting

\subsection{4.6 Adaptive quadrature methods}
\label{sec:org6e31d6c}
Predict the amount of functional variation and adapt the step size to the
varing requirement

using the composite integration
\begin{itemize}
\item recursively halve the step size
\item waste large number of computations
\item only need to halve the interval with large error
\item THIS is \textbf{adaptive}
\end{itemize}


A simple strategy to bound the total error by \(\epsilon\) of
\begin{equation*}
\int_a^bf(x)dx
\end{equation*}
In an interval with length \(h\), the error is smaller than
\(h\frac{\epsilon}{b-a}\)


\begin{equation*}
\epsilon(f,a,b)=\int_a^bf(x)dx-S(a,b)=\frac{h^5}{90}f^{(4)}(\xi)
\end{equation*}

\subsection{4.7 Gaussian Quadrature}
\label{sec:org3cfdcbd}
Construct formula 
\begin{equation*}
\int_a^bw(x)f(x)dx\approx \displaystyle\sum_{k=0}^kA_kf(x_k)
\end{equation*}
of precision degree \textcolor{red}{2n+1} with n+1 points

\begin{theorem}
$x_0,\dots,x_n$ are Gaussian points iff $W(x)=\displaystyle\prod_{k=0}^n(x-x_k)$
is orthogonal to all the polynomials of degree no greater than $n$
\end{theorem}
\begin{proof}
\begin{enumerate}
\item If $x_0,\dots,x_n$ are Gaussian points, then the degree of precision of
the formula $\int_a^bw(x)f(x)\approx \displaystyle\sum_{k=0}^nA_kf(x_k)$ is
at least $2n+1$.

For any polynomial $P_m(x)$ with $m\le n$, the degree of $P_m(x)W(x)$ is no greater
than $2n+1$. Hence
\begin{equation*}
\int_a^bw(x)P_m(x)W(x)dx=\displaystyle\sum_{k=0}^nA_kP_m(x_k)W(x_k)=0
\end{equation*}

\item Let $P_m(x)=W(x)q(x)+r(x)$. Then
\begin{align*}
\int_a^bw(x)P_m(x)dx&=\int_a^bw(x)W(x)q(x)dx+\int_a^bw(x)r(x)dx=
\displaystyle\sum_{k=0}^nA_kr(x_k)\\&=\displaystyle\sum_{k=0}^nA_kP_m(x_k)
\end{align*}
Since $r(x)$'s degree is less then $n+1$ and can be approximate by $n+1$ points

\end{enumerate}
\end{proof}

Suppose \(\{\varphi_0,\dots,\varphi_n,\dots\}\) are linearly independent and
\(\varphi_{n+1}\) is orthogonal to any polynomial \(P_m(x)\) with \(m\le n\). If we
take \(varphi_{n+1}\) to be \(W(x)\), the the \textcolor{red}{roots of $\varphi$}
are the Gaussian points
\section{chap5 Initial-value problems for ordinary differential equations}
\label{sec:orgbd1f09b}
\subsection{5.1 the elementary theory of initial-value problems}
\label{sec:org4b67aaa}
\begin{equation*}
\begin{cases}
\frac{dy}{dt}=f(t,y)&t\in[a,b]\\
y(a)=\alpha\\
\end{cases}
\end{equation*}

Compute the approximation of \(y(t)\) at a set of mesh points 
\(a=t_0<t_1<\dots<t_n=b\)

\begin{definition}
A function $f(t,y)$ is said to satisfy a \textcolor{red}{Lipschitz condition} in
the varaible $y$ on a set $D\subset R^2$ if a constatn $L>0$ exists with
\begin{equation*}
\abs{f(t,y_1)-f(t,y_2)}\le L\abs{y_1-y_2}
\end{equation*}
whenever $(t,y_1),(t,y_2)\in D$. The constant L is a 
\textcolor{red}{Lipschitz condition} 
\end{definition}

\begin{theorem}
Suppose that $D=\{(t,y)|a\le t\le b, -\infty<y<\infty\}$ and that $f(t,y)$ is continuous
on D. If $f$ satisfies a Lipschitz condition on $D$ in the variable y, then the IVP
\begin{equation*}
y'(t)=f(t,y), a\le t\le b, y(a) = \alpha
\end{equation*}
has a /textbf{unique solution} $y(t)$
\end{theorem}


\begin{definition}
The initial-value problem
\begin{equation*}
y'(t)=f(t,y), a\le t\le b, y(a)=\alpha
\end{equation*}
is said to be a \textcolor{red}{well-posed problems} if:
\begin{enumerate}
\item A unique solution $y(t)$ to the problem
\item For any $\epsilon>0$, there exists a positive constant $k(\epsilon)$ s.t.
whenever $\abs{\epsilon_0}<\epsilon$, and $\delta(t)$ is continuous with 
$\abs{\delta(t)}<\epsilon$ on [a,b], a unique solution $z(t)$ 
\begin{equation*}
z'(t)=f(t,z)+\delta(t), a\le t\le b, z(a)=\alpha+\epsilon_0
\end{equation*}
exists with $\abs{z(t)-y(t)}<k(\epsilon)\epsilon$, for all $a\le t\le b$
\end{enumerate}
\end{definition}

\begin{theorem}
Suppose that $D=\{(t,y)|a\le t\le b,-\infty<y<\infty\}$ and that $f(t,y)$ is continuous on $D$.
If $f$ satisfies a Lipschitz condition on $D$ in the variable $y$, then the IVP is well-posed
\end{theorem}
\subsection{5.2 Euler's Method}
\label{sec:org6d9f2bd}
\begin{align*}
y'(t_0)&\approx\frac{y(t_0+h)-y(t_0)}{h}\\
y(t_1)&\approx y(t_0)+hy'(t_0)=\alpha+hf(t_0,\alpha)
\end{align*}
\subsection{5.3 Higher Order Taylor Methods}
\label{sec:orgfef7335}
\begin{definition}
The difference method
\begin{equation*}
w_0=\alpha\quad w_{i+1}=w_i+h\phi(t_i,w_i), \text{for each } i = 0,1,\dots, n-1
\end{equation*}
has \textcolor{red}{local truncation error}
\begin{equation*}
\tau_{i+1}(h)=\frac{y_{i+1}-(y_i+h\phi(t_i,y_i))}{h}=\frac{y_{i+1}-y_i}{h}-\phi(t_i,y_i)
\end{equation*}
\end{definition}

\begin{equation*}
y_{i+1}=y_i+hf(t_i,y_i)+\frac{h^2}{2}f'(t_i,y_i)+\dots+\frac{h^n}{n!}f^{(n-1)}(t_i,y_i)
+ \frac{h^{n+1}}{(n+1)!}f^{(n)}(\xi, y(\xi_i))
\end{equation*}

\begin{align*}
&w_0=\alpha\\
&w_{i+1}=w_i+hT^{(n)}(t_i,w_i)\\
&\text{where } T^{(n)}(t_i,w_i)=f(t_i,w_i)+\frac{h}{2}
\end{align*}
\section{Chap6 Direct Methods for Solving Linear Systems}
\label{sec:org9c9ea18}
\subsection{6.1 Linear Systems of Equations}
\label{sec:org98f2af5}
\textbf{Gaussian elimination with backward substitution}
\subsection{6.2 Pivoting Strategies}
\label{sec:org6d1825d}
\textbf{Problem}: small pivot element may cause trouble

\textbf{Paritial Pivoting}: Determine the smallest \(p\ge k\) s.t.
\(|a_{pk}^{(k)}|=\displaystyle\max_{k\le j\le n}|a_{ik}^{(k)}|\) and
interchange the pth and the kth rows

\textbf{Scaled Partial Pivoting}:
\begin{enumerate}
\item Define a scale factor \(s_i\) for each row as \(s_i=\displaystyle\max_{1\le
      j\le n}|a_{ij}|\)
\item Determine the smallest \(p\ge k\) s.t.
\(\frac{|a_{pk}^{(k)}}{s_p}=\displaystyle\max_{k\le i\le
      n}\frac{|a_{ik}^{(k)}|}{s_i}\)
and interchange the pth and the kth rows
\end{enumerate}


\textbf{Complete Pivoting}: Search all the entries \(a_{ij}\) to find the entry with
the largest magnitude
\subsection{6.5 Matrix Factorization}
\label{sec:org982232e}
\(m_{ik}=a_{ik}/a_{kk}\)
\begin{equation*}
L_k=
\begin{pmatrix}
1 &            &            &               &  \\
  & \ddots     &            &\mbox{\Huge 0} &  \\
  &            & 1          &               &  \\
  &            & -m_{k+1,k} &               &  \\
  &            & \vdots     & \ddots        &  \\
  &            & -m_{n,k}   &               & 1\\
\end{pmatrix}
\end{equation*}  


Hence 

\begin{equation*}
L_1^{-1}L_2^{-1}\dots L_{n-1}^{-1}=
\begin{pmatrix}
1&&&\mbox{\Huge 0}\\
&1&&\\
&&\ddots&\\
\text{\Huge $m_{i,j}$}&&&1\\
\end{pmatrix}
\end{equation*}

\begin{equation*}
U=
\begin{pmatrix}
a_{11}&a_{12}&\dots&a_{1n}\\
&a_{22}&\dots&a_{2n}\\
&&\dots&\vdots\\
&&&a_{nn}\\
\end{pmatrix}
\end{equation*}

\(A=LU\)
\subsection{6.6 Special Types of Matrices}
\label{sec:org43f6c43}
\textbf{Strictly Diagonally Dominant Matrix}.
\(|a_{ii}|>\displaystyle\sum_{\substack{j=1,\\j\neq i}}^n|a_{ij}| \quad
   \text{for each } i=1,\dots,n\)

\begin{theorem}
A strictly diagonally dominant matrix A is \textcolor{red}{nonsingular}. Moreover,
Gaussian elimination can be performed \textcolor{red}{without} row or column
\textcolor{red}{interchanges}, and the computations will be \textcolor{red}{stable}
w.r.t. the growth of roundoff errors
\end{theorem}

\textbf{Choleski's Method for Positive Definite Matrix}:
\begin{definition}
A matrix A is \textcolor{red}{positive definite} if ti's symmetric and if    
$ \mathbf{x}^T \mathbf{A} \mathbf{x}>0$ for every n-dimensional vector $ \mathbf{x}\neq 0$
\end{definition}

\begin{lemma}
A is positive definite
\begin{enumerate}
\item $A^{-1}$ is positive definite as well, and $a_{ii}>0$
\item $\sum|a_{ij}|\le\max|a_{kk}|$; $(a_{ij})^2<a_{ii}a_{jj}$ for each $i\neq j$
\item Each of /A's leading principal submatrices $A_k$/ has a positive determinant
\end{enumerate}
\end{lemma}

\begin{equation*}
U =
\begin{pmatrix}
&u_{ij}\\
&&\\
\end{pmatrix}=
\begin{pmatrix}
u_{11}&&\\
&\ddots&\\
&&u_{nn}\\
\end{pmatrix}
\begin{pmatrix}
1&&u_{ij}/u_{ii}\\
&1&\\
&&1\\
\end{pmatrix}=D\tilde{U}
\end{equation*}
A is symmetric, hence 
\begin{equation*}
L=\tilde{U}^t, A=LDL^t
\end{equation*}
Let 
\begin{equation*}
D^{1/2}=
\begin{pmatrix}
\sqrt{u_{11}}&&\\
&\ddots&\\
&&\sqrt{u_{nn}}\\
\end{pmatrix}, \tilde{L}=LD^{1/2/}, A=\tilde{L}\tilde{L}^t
\end{equation*}

\textbf{Crout Reduction for tridiagonal Linear System}

\begin{equation*}
\begin{pmatrix}
b_1 & c_1    &        &        &\\
a_2 & b_2    & c_2    &        &\\
    & \ddots & \ddots & \ddots &\\
    &        & a_{n-1}& b_{n-1}& c_{n-1} \\
    &        &        & a_n    & b_n\\
\end{pmatrix}
\begin{pmatrix}
x_1\\
x_2\\
\vdots\\
x_{n-1}\\
x_n
\end{pmatrix}=
\begin{pmatrix}
f_1\\
f_2\\
\vdots\\
f_{n-1}\\
f_n
\end{pmatrix}
\end{equation*}

\begin{equation*}
A=
\begin{pmatrix}
\alpha_1 &&&\\
\gamma_2 & \ddots &&\\
         & \ddots & \ddots   &\\
         &        & \gamma_n & \alpha_n\\
\end{pmatrix}
\begin{pmatrix}
1 & \beta_1 &&\\
  & \ddots & \ddots &\\
  &        & \ddots & \beta_{n-1}\\
  &        &        & 1\\
\end{pmatrix}
\end{equation*}  
\section{Chap7 Iterative techiniques in Matrix algebra}
\label{sec:orgd0430db}
\subsection{7.1 Norms of vectors and matrices}
\label{sec:org11f4726}
\begin{definition}
A \textcolor{red}{vector norm} on $R^n$ is a function $||\cdot||: \mathbb{R}^n\to \mathbb{R}$
with following properties for all $ \mathbf{x,y}\in \mathbb{R}^n, \alpha\in C$
\begin{enumerate}
\item $|| \mathbf{x}||\le 0$; $|| \mathbf{x}||=0\Longleftrightarrow \mathbf{x}= \mathbf{0}$
\item $||\alpha \mathbf{x}||=|\alpha|\cdot|| \mathbf{x}||$
\item $|| \mathbf{x}+ \mathbf{y}||\le|| \mathbf{x}||+|| \mathbf{y}||$
\end{enumerate}
\end{definition}

\(|| \mathbf{x}||_1=\displaystyle\sum_{i=1}^n|x_i|\).
\(||\mathbf{x}_p||=(\displaystyle\sum_{i=1}^n|x_i|^p)^{1/p}\)

\begin{definition}
A sequence $\{\mathbf{x}^{(k)}\}_{k=1}^\infty$ of vectors in $R^n$ 
\textcolor{red}{converge to} $\mathbf{x}$ w.r.t the norm $||\cdot||$ if
given any $\epsilon>0$ there exists an integer $N(\epsilon)$ s.t.
$||\mathbf{x}^{(k)}-\mathbf{x}||<\epsilon$ for all $k\ge N(\epsilon)$
\end{definition}

\begin{theorem}
The sequence of vectors $\{\mathbf{x}^{(k)}\}$ converges to $ \mathbf{x}\in R^n$
w.r.t. $||\cdot||$ if and only if $ \lim\limits_{k\to\infty}\mathbf{x}^{(k)}_i=x_i$
for each $i=1,2,\dots,n$
\end{theorem}

\begin{definition}
If there exist positive constants $C_1,C_2$ s.t. $C_1||\mathbf{x}||_B\le||\mathbf{x}||_A
\le C_2||\mathbf{x}|_B|$. Then $||\cdot||_A,||\cdot||_B$ are \textcolor{red}{equivalent} 
\end{definition}

\begin{theorem}
All the vector norm in $R^n$ are equivalent
\end{theorem}


\begin{definition}
A \textcolor{red}{matrix norm} on the set of $n\times n$:
\begin{enumerate}
\item $||\mathbf{A}||\ge0;||\mathbf{A}||=0\Longleftrightarrow \mathbf{A}=\mathbf{0}$
\item $||\alpha \mathbf{A}||=|\alpha|\cdot||\mathbf{A}||$
\item $||\mathbf{A}+\mathbf{B}||\le||\mathbf{A}||+||\mathbf{B}||$
\item $||\mathbf{AB}||\le||\mathbf{A}||\cdot||\mathbf{B}||$
\end{enumerate}
\end{definition}

\textbf{Frobenius Norm}: \(||\mathbf{A}||_F=\sqrt{\displaystyle\sum_{i=1}^n
   \displaystyle\sum_{j=1}^n|a_{ij}|^2}\)

\textbf{Natural Norm}: \(||\mathbf{A}||_p=\displaystyle\max_{\mathbf{x}\neq
   \mathbf{0}}\frac{||\mathbf{Ax}||_p}{||\mathbf{x}||_p}=\displaystyle\max_{\mathbf{z}\neq
   \mathbf{0}}||\mathbf{A}\frac{\mathbf{z}}{||\mathbf{z}||}||=\displaystyle\max_{||\mathbf{x}||_p=1}||\mathbf{Ax}||_p\)

\(||\mathbf{A}||_\infty=\displaystyle\max_{1\le i\le n}\displaystyle\sum_{j=1}^n|a_{ij}|\),
\(||\mathbf{A}||_1=\displaystyle\max_{1\le j\le n}\displaystyle\sum_{i=1}^n|a_{ij}|\),
\(||\mathbf{A}||_2=\sqrt{\lambda_\text{max}(\mathbf{A}^T \mathbf{A})}\)
\subsection{7.2 Eigenvalues and Eigenvectors}
\label{sec:orga98dc0e}
\textbf{spectral radius}.
\begin{definition}
The \textcolor{red}{spectral radius $\rho(A)$} of a matrix A is defined as
$\rho(A)=\max|\lambda|$ where $\lambda$ is an eigenvalue of A
\end{definition}

\begin{theorem}
If A is an $n\times n$ matrix, then $\rho(A)\le||A||$ for any natural norm
\end{theorem}

\begin{proof}
$|\lambda|\cdot||\bl{x}||=||\lambda\bl{x}||=||A\bl{x}||\le||A||\cdot||\bl{x}||$
\end{proof}

\begin{definition}
We call an $n\times n$ matrix A \textcolor{red}{convergent} if for all $i,j=1,\dots,n$
$\lim\limits_{k\to\infty}(A^k)_{ij}=0$
\end{definition}
\subsection{7.3 Iterative techniques for solving linear systems}
\label{sec:org6d99fd7}
\textbf{Jacobi iterative method}.
\begin{equation*}
\begin{cases}
a_{11}x_1+a_{12}x_2+\dots+a_{1n}x_n=b_1\\
a_{21}x_1+a_{22}x_2+\dots+a_{2n}x_n=b_2\\
\dots\\
a_{n1}x_1+a_{n2}x_2+\dots+a_{nn}x_n=b_n\\
\end{cases}\Longrightarrow
\begin{cases}
x_1=\frac{1}{a_{11}}(-a_{12}x_2-\dots-a_{1n}x_n+b_1)\\
x_2=\frac{1}{a_{22}}(-a_{21}x_1-\dots-a_{2n}x_n+b_2)\\
\dots\\
x_1=\frac{1}{a_{nn}}(-a_{n2}x_1-\dots-a_{nn-1}x_{n-1}+b_n)\\
\end{cases}
\end{equation*}
In matrix form, 
\begin{equation*}
A=
\begin{pmatrix}
D&-U&-U\\
-L&D&-U\\
-L&-L&D
\end{pmatrix}
\end{equation*}
\begin{align*}
A\bl{x}=\bl{b}&\Leftrightarrow(D-L-U)\bl{x}=\bl{b}\\
&\Leftrightarrow D\bl{x}=(L+U)\bl{x}+\bl{b}\\
&\Leftrightarrow \bl{x}=\underbrace{D^{-1}(L+U)}_{T_j}\bl{x}+\underbrace{D^{-1}}_{\bl{c}_j}\bl{b}
\end{align*}.
$T_j$ is Jacobi iterative matrix. $\bl{x}^{(k)}=T_j\bl{x}^{(k-1)}+\bl{c}_j$


*Gauss-Seidel iterative method*
\begin{align*}
&\bl{x}^{(k)}=D^{-1}(L\bl{x}^{(k)}+U\bl{x}^{(k-1)})+D^{-1}\bl{b}\\
\Leftrightarrow&(D-L)\bl{x}^{(k)}=U\bl{x}^{(k-1)}+\bl{b}\\
\Leftrightarrow&\bl{x}^{(k)}=\underbrace{(D-L)^{-1}U\bl{x^{(k-1)}}}_{T_g}
+\underbrace{(D-L)^{-1}\bl{b}}_{\bl{c}_g}
\end{align*}


\textbf{convergence of iterative methods}
\begin{theorem}
the following are equivalent:
\begin{enumerate}
\item A is a convergent matrix
\item $\lim\limits_{n\to\infty}||A^n|| = 0$ for some natural norm
\item $\lim\limits_{n\to\infty}||A^n||=0$ for all natural norms
\item $\rho(A)<1$
\item $\lim\limits_{n\to\infty}A^n\bl{x}=\bl{0}$ for every $\bl{x}$
\end{enumerate}
\end{theorem}

\(\bl{e}^{(k)}=\bl{x}^{(k)}-\bl{x}^*=(T\bl{x}^{(k-1)}+\bl{c})-(T\bl{x}^*+\bl{c})
   =T(\bl{x}^{(k-1)}-\bl{x}^*)=T\bl{e}^{(k-1)}\Rightarrow\bl{e}^{(k)}=T^k\bl{e}^{(0)}\).
\(||\bl{e}^{(k)}\le||T||\cdot||\bl{e}^{(k-1)}||\le\dots\le||T||^k\cdot||bl{e}^{(0)}||\)

\begin{theorem}
For any $\bl{x}^{(0)}\in R^n$, the sequence $\{\bl{x}^{(k)}\}_{k=0}^\infty$
defined by $\bl{x}^{(k)}=T\bl{x}^{(k-1)}+\bl{c}$ for each k, converges to the
unique solution of $\bl{x}=T\bl{x}+\bl{c}$ if and only if $\rho(T)<1$
\end{theorem}
\(\rho(T)<1\Longrightarrow(I-T)^{-1}=\displaystyle\sum_{j=0}^\infty T^j\)

\begin{theorem}
If $\norm{T}<1$ for any natural matrix norm and $\bl{c}$ is a given vector, then the
sequence $\{\bl{x}^{(k)}\}_{k=0}^\infty$ defined by $\bl{x}^{(k)}=T\bl{x}^{(k-1)}+\bl{c}$
converges for any $\bl{x}^{(0)}\in R^n$ to a vector $\bl{x}$. And the following
error bounds hold
\begin{enumerate}
\item $\norm{\bl{x}-\bl{x}^{(k)}}\le\norm{T}^k\norm{\bl{x}-\bl{x}^{(0)}}$
\item $\norm{\bl{x}-\bl{x}^{(k)}}\le\frac{\norm{T}^k}{1-\norm{T}}\norm{\bl{x}^{(1)}
-\bl{x}^{(0)}}$
\end{enumerate}
\end{theorem}

\begin{theorem}
If A is a strictly diagonally dominant, then for any choice of $\bl{x}^{(0)}$, both the
Jacobi and Gauss-Seidel methods give sequences $\{\bl{x}^{(k)}\}_{k=0}^\infty$
that converges to the unique solution
\end{theorem}

\textbf{relaxation methods}.
\(x_i^{(k)}=\frac{1}{a_{ii}}(b_i-\displaystyle\sum_{j=1}^{i-1}a_{ij}x_i^{(k)}-
   \displaystyle\sum_{j=i+1}^na_{ij}x_j^{(k-1)})=x_i^{(k-1)}+\frac{r_i^{(k)}}{a_{ii}}\)
and relaxation method is
\(x_i^{(k)}=x_i^{(k-1)}+\omega\frac{r_i^{(k)}}{a_{ii}}\)

\begin{theorem}{(kahan)}
If $a_{ii}\neq 0$ for each i. Then $\rho(T_\omega)\ge\abs{\omega-1}$.
\end{theorem}
This implies the SOR method can converge only if \(0<\omega<2\)

\begin{theorem}{(Ostrowski-Reich)}
If A is positive definite and $0<\omega<2$, the SOR converges
\end{theorem}

\begin{theorem}
If A is positive definite and tridiagonal, then $\rho(T_g)=(\rho(T_j))^2<1$, and
the optimal choice of $\omega$ for the SOR method is
$\omega=\frac{2}{1+\sqrt{1-(\rho(T_j))^2}}$. With this choice of $\omega$, we
have $\rho(T_\omega)=\omega-1$
\end{theorem}

\subsection{7.4 Error bounds and iterative refinement}
\label{sec:org62ce09c}
Assume that A is accurate and \(\bl{b}\) has the error \(\delta \bl{b}\),
then \(\bl{A}(\bl{x}+\delta \bl{x})=\bl{b}+\delta \bl{b}\)

\begin{theorem}
Suppose $\tilde{\bl{x}}$ is an approximation to the solution of $ \bl{Ax=b}$
A is nonsingular matrix. Then for any natural norm,
\begin{equation*}
||\bl{x-\tilde{x}}||\le||\bl{r}||\cdot||A^{-1}||
\end{equation*}
and if $ \bl{x\neq 0, b\neq 0}$,
\begin{equation*}
\frac{||\delta\bl{x}||}{||\bl{x}||}\le||\bl{A}
||\cdot||\bl{A}^{-1}||\cdot \frac{||\delta\bl{b}||}{||\bl{b}||}
\end{equation*}
\end{theorem}

\begin{proof}
$\bl{r=b-A\tilde{x}}=A\bl{x}-A\tilde{\bl{x}}$ and A is nonsingular. Hence 
$\bl{x-\tilde{x}}=A^{-1}\bl{r}$. Since $\frac{||A^{-1}\bl{r}||}{||\bl{r}||}\le||A^{-1}||$
, $||\bl{x-\tilde{x}}||=||A^{-1}\bl{x}||\le||A^-1||\cdot||\bl{r}||$. Also
$||\bl{b}||\le||A||\cdot||\bl{x}||$. So $1/||\bl{x}||\le||A||/||\bl{b}||$
\end{proof}

\begin{theorem}
If a matrix B satisfies $||B||<1$ for some natural norm, then
\begin{enumerate}
\item $I\pm B$ is nonsingular
\item $||(I\pm B)^{-1}||\le \frac{1}{1-||B||}$
\end{enumerate}
\end{theorem}

Assume \(\bl{b}\) is accurate, A has the error \(\delta A\), then
\((A+\delta A)(\bl{x}+\delta\bl{x})=\bl{b}\). Hence
\(\frac{||\delta\bl{x}||}{||\bl{x}||}\le \frac{||A^{-1}||\cdot||\delta
   A||}{1-||A^{-1}||\cdot||\delta A||}=\frac{||A||\cdot||A^{-1}||}{1
   -||A||\cdot||A^{-1}||\cdot \frac{||\delta A||}{||A||}}\)

\textbf{condition number K(A)} is \(||A||\cdot||A^{-1}||\)

\begin{theorem}
Suppose A is nonsingular and $||\delta A||\le \frac{1}{||A^{-1}||}$. The solution
$\bl{x}+\delta\bl{x}$ to $(A+\delta A)(\bl{x}+\delta\bl{x})$ approximates the solution
$\bl{x}$ of $A\bl{x}=\bl{b}$ with the error estimate
\begin{equation*}
\frac{||\delta\bl{x}||}{||\bl{x}||}\le \frac{K(A)}{1-K(A)||\delta A||/||A||}
(\frac{||\delta A||}{||A||}+ \frac{||\delta\bl{b}||}{||\bl{b}||})
\end{equation*}
\end{theorem}

note:
\begin{enumerate}
\item If A is symmetric, then \(K(A)_2= \frac{\max|\lambda|}{\min|\lambda|}\)
\item \(K(A)_p\ge1\) for all natural norm
\item \(K(\alpha A)_=K(A)\) for any \(\alpha\in R\)
\item \(K(A)_2=1\) if A is orthogonal
\item \(K(RA)_2=K(AR)_2=K(A)_2\) for all orthogonal matrix R\_
\end{enumerate}


\textbf{iterative refinement}:
\begin{theorem}
Suppose $\bl{x}^*$ is an approximation to the solution of $A\bl{x}=\bl{b}$, A is
nonsingular matrix and $\bl{r}=\bl{b}-A\bl{x}$. Then for any natural norm,
$||\bl{x-x^*}\le||\bl{r}||\cdot||A^{-1}||$, and if $\bl{x,b}\neq\bl{0}$
\begin{equation*}
\frac{||\bl{x}-\bl{x}^*||}{||\bl{x}||}\le K(A)\frac{||\bl{r}||}{||\bl{b}||}
\end{equation*}
\end{theorem}

\textbf{refinement}
\begin{enumerate}
\item \(A\bl{x}=\bl{b}\) => approximation \(\bl{x}_1\)
\item \(\bl{r}_1=\bl{b}-A\bl{x}_1\)
\item \(A\bl{d}_1=\bl{r}_1\) => \(\bl{d}_1\)
\item \(\bl{x}_2=\bl{x}_1+\bl{d}_1\)
\end{enumerate}
\section{Chap8 Approximation theory}
\label{sec:org2d30a34}
Given \(x_1\dots x_m\) and \(y_1\dots y_m\) find a \textbf{simpler} function \(P(x)\approx
  f(x)\)
\subsection{8.1 Discrete least squares approximation}
\label{sec:orgf787dc6}
Determine the polynomial \(P_n(x)=a_0+a_1x+\dots+a_nx^n\) to approximate the
data \(\{(x_i,y_i)\mid i=1,2,\dots,m\}\) s.t. the least squares error
\(E_2=\displaystyle\sum_{i=1}^m(P_n(x_i)-y_i)^2\) is minimized. Here \(n\ll m\)

\(E_2(a_0,\dots,a_n)=\displaystyle\sum_{i=1}^m(a_0+a_1x_i+\dots+a_nx^n_i-y_i)^2\)

For \(E_2\) to be minimized it's necessary that \(\frac{\partial E_2}{\partial
   a_k}=0\)

\begin{align*}
0&=\frac{\partial E_2}{\partial a_k}=
2 \displaystyle\sum_{i=1}^m(P_n(x_i)-y_i)\frac{\partial P_N(x_i)}{\partial a_k}\\
&=2 \displaystyle\sum_{i=1}^m(\displaystyle\sum_{j=0}^na_jx_i^j-y_i)x_i^k\\
&=2(\displaystyle\sum_{j=0}^na_j(\displaystyle\sum_{i=1}^mx_i^{j+k})-
\displaystyle\sum_{i=1}^my_ix_i^k)
\end{align*}

Let \(b_k=\displaystyle\sum_{i=1}^m x_i^k,
   c_k=\displaystyle\sum_{i=1}^my_ix_i^k\), then
\begin{equation*}
\begin{pmatrix}
b_{0+0} & \dots & b_{0+n}\\
\vdots & \vdots&\vdots\\
b_{n+0} & \dots & b_{n+n}\\
\end{pmatrix}
\begin{pmatrix}
a_0\\
\vdots\\
a_n
\end{pmatrix}=
\begin{pmatrix}
c_0\\
\vdots
c_n\\
\end{pmatrix}
\end{equation*}
\subsection{8.2 orthogonal polynomials and least squares approximation}
\label{sec:org9c9d6c2}
\begin{theorem}
If $\varphi_j(x)$ is a polynomial of degree $j$ for each $j=0,\dots,n$, then 
$\{\varphi_0(x),\dots,\varphi_n(x)\}$ is \textcolor{red}{linearly independent} on
any interval $[a,b]$
\end{theorem}

\begin{theorem}
Let $\prod_n$ be the set of all polynomials of degree at most n. If
$\{\varphi_0(x),\dots,\varphi_n(x)\}$ is a collection of linearly independent
polynomials in $\prod_n$ then any polynomials in $\prod_n$ can be written
uniquely as a linear combination of $\{\varphi_0(x),\dots,\varphi_n(x)\}$
\end{theorem}

\begin{definition}
For a general linear independent set of functions $\{\varphi_0(x),\dots,\varphi_n(x)\}$,
a linear combination of $\{\varphi_0(x),\dots,\varphi_n(x)\}$.
$P(x)=\displaystyle\sum_{j=0}^n\alpha_j\varphi_j(x)$ is called a
\textcolor{red}{generalized polynomial} 
\end{definition}


Weight function
\begin{align*}
&E=\sum w_i[P(x_i)-y_i]^2\\
&E=\int_a^bw(x)[P(x)-f(x)]^2dx
\end{align*}
\begin{equation*}
\sum w_i\norm{P(x)-f(x)}_2^2=\sum w_i\bl{e}^T\bl{e}=\bl{e}^TW\bl{e}
\end{equation*}
where
\#+ATTR\textsubscript{LATEX} :mode math :environment pmatrix :math-preffix W=
\begin{center}
\begin{tabular}{lll}
w\textsubscript{1} &  & \\
 & \dots{} & \\
 &  & w\textsubscript{n}\_\\
\end{tabular}
\end{center}


The \textbf{general least squares approximation problem}. \(E\) is minimized


\textbf{Inner product} and \textbf{norm}
\begin{equation*}
(f,g)=
\begin{cases}
\displaystyle\sum_{i=1}^m w_if(x_i)g(x_i)\\
\int_a^bw(x)f(x)g(x)dx
\end{cases}
\end{equation*}
It can be shown that \((f,g)\) is an \textbf{inner proudct} and \(\norm{f}=\sqrt{(f,f)}\)
is a \textbf{norm}


Hence, The general least squares approximation problem is to find a
generalized polynomial \(P(x)\) such that \(E=(P-y,P-y)=\norm{P-y}^2\) is
minimized. 


Let \(P(x)=a_0\phi_0(x)+\dots+a_n\phi_n(x)\). 
\(\frac{\partial E}{\partial a_k}=0\Longrightarrow
   \displaystyle\sum_{j=0}^n(\phi_k,\phi_j)a_j=(\phi_k,f)\).
\begin{equation*}
\begin{pmatrix}
&&\\
&b_{ij}=(\phi_i,\phi_j)&\\
&&
\end{pmatrix}
\begin{pmatrix}
a_0\\
\vdots\\
a_n
\end{pmatrix}
=
\begin{pmatrix}
(\phi_0,f)\\
\dots\\
(\phi_n,f)
\end{pmatrix}=\vec{c}
\end{equation*}


Example. When approximating \(f(x)\in C[0,1]\) with \(\phi_j(x)=x^j\) and
\(w(x)=1\), then
\begin{equation*}
(\phi_i,\phi_j)=\int_0^1x^ix^jdx=\frac{1}{i+j+1}
\end{equation*}
Hilbert matrix.


Improvement: Find a general linear independent set of functions s.t. any pair
is \textbf{orthogonal}, then the matrix will be diagonal. And
\begin{equation*}
a_k=\frac{(\phi_k,f)}{(\phi_k,\phi_k)}
\end{equation*}


\textbf{Construction}
\begin{theorem}
the set of polynomial functions defined in the following way is orthogonal on [a,b]
w.r.t. weight function $w$
\begin{align*}
\phi_0(x)&=1\\
\phi_1(x)&=x-B_1\\
\phi_k(x)&=(x-B_k)\phi_{k-1}(x)-C_k\phi_{k-2}(x)\\
B_k&=\frac{(x\phi_{k-1},\phi_{k-1})}{(\phi_{k-1},\phi_{k-1})}\\
C_k&=\frac{(x\phi_{k-1},\phi_{k-2})}{(\phi_{k-2},\phi_{k-2})}
\end{align*}
\end{theorem}


Example. Approximate
\[
\begin{pmatrix}
 x & 1 & 2 & 3 & 4 \\
 y & 4 & 10 & 18 & 26 \\
\end{pmatrix}
\]

with \(y=a_0+a_1x+a_2x^2, w=1\)


Solution. \(y=a_0\phi_0(x)+a_1\phi_1(x)+a_2\phi_2(x)\). \(\phi_0(x)=1\)
\subsection{8.3 Chebyshev polynomials and economization of power series}
\label{sec:org6774070}
Minimize \(\norm{P-y}_\infty\), \textcolor{red}{minimax problem} 

\begin{enumerate}
\item Find a polynomial \(P_n(x)\) of degree n s.t. \(\norm{P_n-f}_\infty\) is
minimized

\begin{definition}
If $P(x_0)-f(x_0)=\pm\norm{P-f}_\infty, \textcolor{red}{x_0}$ is called a
$(\pm)$ \textcolor{red}{deviation point}
\end{definition}

We can estimate the features of the polynomial
\begin{enumerate}
\item If \(f\in C[a,b]\) and f is \textcolor{red}{not} a polynomial of degree
n, then there exists a \textcolor{red}{unique} polynomial \(P_n(x)\) s.t. 
\(\norm{P_n-f}_\infty\) is minimized
\item \(P_n(x)\) exists, and must have both + and - deviation points
\item \begin{theorem}{Chebyshev Theorem }
$P_n(x)$ minimizes $\norm{P_n-f}\Longleftrightarrow P_n(x)$ has at least
\textcolor{red}{n+2} alternating + and - deviation points w.r.t. $f$.
That is, there exists a set of points $a\le t_1<\dots<t_{n+2}\le b$
s.t. 
\begin{equation*}
P_n(t_k)-f(t_k)=\pm(-1)^k\norm{P_n-f}_\infty
\end{equation*}
\end{theorem}

The set \(\{t_k\}\) is called the \textcolor{red}\{Chebyshev altenating
sequence\}
\end{enumerate}

\item Determine the interpolating points \(\{x_0,\dots,x_n\}\) s.t. \(P_n(x)\)
minimizes the remainder

\begin{equation*}
\abs{P_n(x)-f(x)}=
\abs{R_n(x)}=\abs{\frac{f^{(n+1)}(\xi_x)}{(n+1)!}\displaystyle\prod_{i=0}^n(x-x_i)}
\end{equation*}

2.1 Find \(\{x_1,\dots,x_n\}\) s.t. \(\norm{\omega_n}_\infty\) is minimized on
\([-1,1]\), where \(\omega_n(x)=\displaystyle\prod_{i=1}^n(x-x_i)\).

Since \(\omega_n(x)=x^n-P_{n-1}(x)\), the problem becomes to
\item Find a polynomial \(P_{n-1}(x) s.t. \norm{x^n-P_{n-1}(x)}_\infty\) is
minimized on \([-1,1]\)
\end{enumerate}


\textbf{Chebyshev polynomials}. Consider the \(n+1\) extreme values of \(\cos(n\theta)\)
on \([0,\pi]\).

Let \(x=\cos(\theta)\), then \(x\in[-1,1]\), \(T_n(x)=\cos(n\theta)=
   cos(n\cdot \arccos x)\) is called the \textcolor{red}{Chebyshev polynomial}.

Properties:
\begin{enumerate}
\item \(t_k=\cos(\frac{k}{n}\pi), k=0,\dots,n,
      T_n(t_k)=(-1)^k\norm{T_n(x)}_\infty\)
\item \(T_n(x)\) has \(n\) roots \(x_k=\cos(\frac{2k-1}{2n}\pi), k=1,\dots,n\)
\item \(T_n\) has recurrence relation
\begin{equation*}
T_0(x)=1,T_1(x)=x,T_{n+1}(x)=2xT_n(x)-T_{n-1}(x)
\end{equation*}
\item \(\{T_0(x),T_1(x),\dots\}\) are orthogonal on \([-1,1]\) w.r.t. weight
function \(w(x)=1/\sqrt{1-x^2}\)
\begin{equation*}
(T_n,T_m)=\int_{-1}^1\frac{T_n(x)T_m(x)}{\sqrt{1-x^2}}dx=
\begin{cases}
0 & n\neq m\\
\pi & n=m=0\\
\pi/2&n=m\neq 0\\
\end{cases}
\end{equation*}
\end{enumerate}


\(w_n(x)=x^n-P_{n-1}(x)=T_n(x)/2^{n-1}\). Let \(\Wt{\prod}\) =\{monic polynomials
of degree n\}. 
\begin{equation*}
\min_{w_n\in\wt{\prod}}\norm{w_n}_\infty=\norm{\frac{1}
{2^{n-1}}T_n(x)}_\infty=\frac{1}{2^{n-1}}
\end{equation*}
\begin{equation*}
\abs{P_n(x)-f(x)}=
\abs{R_n(x)}=\abs{\frac{f^{(n+1)}(\xi_x)}{(n+1)!}\displaystyle\prod_{i=0}^n(x-x_i)}
\end{equation*}
Take the \(n+1\) roots of \(T_{n+1}(x)\) as the interpolating points, then the
interpolating polynomial \(P_n(x)\) assumes the minimum upper bound of the
absolute error \(\frac{M}{2^n(n+1)!}\)


\textbf{Economization of power series}. Given \(P_n(x)\approx f(x)\), economization of
pppppppower series is to reduce the degree of polynomial with a \textbf{minimal loss of
accuracy} 


Consider approximating an arbitrary n-th degree polynomial
\begin{equation*}
P_n(x)=a_nx^n+a_{n-1}x^{n-1}+\dots+a_1x+a_0
\end{equation*}
with a polynomial \(P_{n-1}(x)\) by removing an n-th degree polynomial \(Q_n(x)\)
that has the coefficient \(a_n\) for \(x_n\). Then
\begin{equation*}
\max_{[-1,1]}\abs{f(x)-P_{n-1}(x)}\le\max_{[-1,1]}\abs{f(x)-P_n(x)}+
\max_{[-1,1]}\abs{Q_n(x)}
\end{equation*}
To minimize the loss of accuracy, \(Q_n(x)=a_n\frac{T_n(x)}{2^{n-1}}\)


Example. The 4-th order Taylor polynomial for \(f(x)=e^x\) on \([-1,1]\) is

\begin{equation*}
P_4=1+x+\frac{x^2}{2}+\frac{x^3}{5}+\frac{x^4}{24}
\end{equation*}
.The upper bound of truncation error is 
\(\abs{R_4(x)}\le\frac{e}{5!}\abs{x^5}\approx0.023\)

solution. \(T_4=8x^4-8x^2+1, Q_4\)
\section{chap9 Approximating Eigenvalues}
\label{sec:org629b4bb}
\subsection{9.3 the power method}
\label{sec:orgabf7368}
\textbf{the original method}
Assumptions: A is an \(n\times n\) matrix with eigenvalues satisfying
\(|\lambda_1|>|\lambda_2|\ge\dots\ge|\lambda_n|\ge 0\)

\begin{align*}
&\bl{x}^{(0)}=\displaystyle\sum_{j=1}^{n}\beta_j\bl{v}_j,\quad\beta_1\neq 0\\
&\bl{x}^{(1)}=A\bl{x}^{(0)}=\displaystyle\sum_{j=1}^n\beta_j\lambda_j\bl{v}_j\\
&\bl{x}^{(2)}=A\bl{x}^{(1)}=\displaystyle\sum_{j=1}^n\beta_j\lambda_j^2\bl{v}_j\\
&\dots\\
&\bl{x}^{(k)}\approx\lambda_1^k\beta_1\bl{v}_1, \quad \lambda_1\approx
\frac{\bl{x}^{(k)}_i}{\bl{x}^{(k-1)}_i}
\end{align*}

\textbf{Normalization}. Suppose \(||\bl{x}||_\infty=1\). Let
\(||\bl{x}^{(k)}||_\infty=|x_{p_k}^{(k)}|\).Then \(\bl{u}^{(k-1)}=
   \frac{\bl{x}^{(k-1)}}{|x_{p_{k-1}}^{(k-1)}|}\) and
\(\bl{x}^{(k)}=A\bl{u}^{(k-1)}\).
Then \(\bl{u}^{(k)}= \frac{\bl{x}^{(k)}}{|x_{p_k}^{(k)}|}\to \bl{v}_1\).
\(\lambda_1\approx
   \frac{\bl{x}_i^{(k)}}{\bl{u}_i^{(k-1)}}=\bl{x}_{p_{k-1}}^{(k)}\)

Note:
\begin{enumerate}
\item the method works for \textbf{multiple} eigenvalues
\(\lambda_1=\lambda_2=\dots=\lambda_r\)
\item the method fails to converge if \(\lambda_1=-\lambda_2\)
\item Aitken's \(\Delta^2\) can be used
\end{enumerate}


\textbf{Rate of convergence}. \(\bl{x}^{(k)}=A\bl{x}^{(k-1)}=\lambda_1^k
   \displaystyle\sum_{j=1}^n\beta_j(\frac{\lambda_j}{\lambda_1})^k\bl{v}_j\).
Make \(|\lambda_2/\lambda_1|\) as small as possible.
Assume \(\lambda_1>\lambda_2\ge\dots\ge\lambda_n, |\lambda_2|>|\lambda_n|\).
Let \(B=A-pI\), then \(|\lambda I-A|=|\lambda I-(B+pI)|=|(\lambda-p)I-B|\).
Hence \(\lambda_A-p=\lambda_B\). Since  \(\frac{|\lambda_2-p|}{|\lambda_1-p|}<
   \frac{|\lambda_2|}{|\lambda_1|}\) . The iteration is fast


\textbf{Inverse power method}. If A has
\(|\lambda_1|\ge|\lambda_2|\ge\dots>|\lambda_n|\), then \(A^{-1}\) has
\(|\frac{1}{\lambda_n}|>| \frac{1}{\lambda_{n-1}}|\ge\dots\ge|
   \frac{1}{\lambda_1}|\) 
\end{document}